Some fixes and cleanups to the blktap code.
DPRINTK(" tap - Frontend connection init:\n");
active_reqs_init();
+ blkif_interface_init();
+ blkdev_schedule_init();
- ptfe_blkif.status = DISCONNECTED;
-
(void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);
#include <asm/pgalloc.h>
#include <asm-xen/hypervisor.h>
#include <asm-xen/xen-public/io/blkif.h>
+#include <asm-xen/xen-public/io/ring.h>
+
+/* Used to signal to the backend that this is a tap domain. */
+#define BLKTAP_COOKIE 0xbeadfeed
/* -------[ debug / pretty printing ]--------------------------------- */
#if 0
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 1
#define ASSERT(_p) \
if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
__LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
- __FILE__ , __LINE__ , ## _a )
#else
#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
#endif
#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
-/* -------[ connection / request tracking ]--------------------------- */
+
+/* -------[ connection tracking ]------------------------------------- */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
#define VMALLOC_VMADDR(x) ((unsigned long)(x))
typedef struct blkif_st {
/* Unique identifier for this interface. */
- domid_t domid;
- unsigned int handle;
+ domid_t domid;
+ unsigned int handle;
/* Physical parameters of the comms window. */
- unsigned long shmem_frame;
- unsigned int evtchn;
- int irq;
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
/* Comms information. */
- blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
- BLKIF_RING_IDX blk_req_cons; /* Request consumer. */
- BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */
+ blkif_back_ring_t blk_ring;
enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
/*
* DISCONNECT response is deferred until pending requests are ack'ed.
* We therefore need to store the id from the original request.
- */ u8 disconnect_rspid;
- struct blkif_st *hash_next;
- struct list_head blkdev_list;
- spinlock_t blk_ring_lock;
- atomic_t refcnt;
-
+ */
+ u8 disconnect_rspid;
+ struct blkif_st *hash_next;
+ struct list_head blkdev_list;
+ spinlock_t blk_ring_lock;
+ atomic_t refcnt;
struct work_struct work;
} blkif_t;
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+void blkif_disconnect_complete(blkif_t *blkif);
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define blkif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ blkif_disconnect_complete(_b); \
+ } while (0)
+
+
+/* -------[ active request tracking ]--------------------------------- */
+
typedef struct {
blkif_t *blkif;
unsigned long id;
unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int next_free;
+ int inuse; /* debugging */
} active_req_t;
+typedef unsigned int ACTIVE_RING_IDX;
-/* -------[ block ring structs ]-------------------------------------- */
-
-/* Types of ring. */
-#define BLKIF_REQ_RING_TYPE 1
-#define BLKIF_RSP_RING_TYPE 2
-
-/* generic ring struct. */
-typedef struct blkif_generic_ring_struct {
- int type;
-} blkif_generic_ring_t;
-
-/* A requestor's view of a ring. */
-typedef struct blkif_req_ring_struct {
-
- int type; /* Will be BLKIF_REQ_RING_TYPE */
- BLKIF_RING_IDX req_prod; /* PRIVATE req_prod index */
- BLKIF_RING_IDX rsp_cons; /* Response consumer index */
- blkif_ring_t *ring; /* Pointer to shared ring struct */
-
-} blkif_req_ring_t;
-
-#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
-
-/* A responder's view of a ring. */
-typedef struct blkif_rsp_ring_struct {
-
- int type;
- BLKIF_RING_IDX rsp_prod; /* PRIVATE rsp_prod index */
- BLKIF_RING_IDX req_cons; /* Request consumer index */
- blkif_ring_t *ring; /* Pointer to shared ring struct */
-
-} blkif_rsp_ring_t;
-
-#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
-
-#define RING(a) (blkif_generic_ring_t *)(a)
-
-inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
+inline unsigned int ID_TO_IDX(unsigned long id);
+inline domid_t ID_TO_DOM(unsigned long id);
+inline void active_reqs_init(void);
/* -------[ interposition -> character device interface ]------------- */
#define BLKTAP_IOCTL_KICK_FE 1
#define BLKTAP_IOCTL_KICK_BE 2
#define BLKTAP_IOCTL_SETMODE 3
+#define BLKTAP_IOCTL_PRINT_IDXS 100
/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
#define RING_PAGES 128
extern unsigned long rings_vstart;
-/* -------[ Here be globals ]----------------------------------------- */
+/* -------[ Here be globals ]----------------------------------------- */
extern unsigned long blktap_mode;
-
-/* blkif struct, containing ring to FE domain */
-extern blkif_t ptfe_blkif;
-
/* Connection to a single backend domain. */
-extern blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */
-extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
-extern BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */
-
-/* Rings up to user space. */
-extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
-extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
+extern blkif_front_ring_t blktap_be_ring;
/* Event channel to backend domain. */
extern unsigned int blkif_ptbe_evtchn;
/* init function for character device interface. */
int blktap_init(void);
+/* init function for the blkif cache. */
+void __init blkif_interface_init(void);
+void __init blkdev_schedule_init(void);
+void blkif_deschedule(blkif_t *blkif);
+
/* interfaces to the char driver, passing messages to and from apps. */
void blktap_kick_user(void);
-int blktap_write_to_ring(blkif_request_t *req);
-
/* user ring access functions: */
int blktap_write_fe_ring(blkif_request_t *req);
int blktap_read_fe_ring(void);
int blktap_read_be_ring(void);
-/* and the helpers they call: */
-inline int write_resp_to_fe_ring(blkif_response_t *rsp);
-inline void kick_fe_domain(void);
+/* fe/be ring access functions: */
+int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
+int write_req_to_be_ring(blkif_request_t *req);
-inline int write_req_to_be_ring(blkif_request_t *req);
+/* event notification functions */
+inline void kick_fe_domain(blkif_t *blkif);
inline void kick_be_domain(void);
/* Interrupt handlers. */
/* Control message receiver. */
extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+/* debug */
+void print_vm_ring_idxs(void);
+
#define __BLKINT_H__
#endif
/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static kmem_cache_t *blkif_cachep;
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
+ blkif = blkif->hash_next;
+ return blkif;
+}
+
+static void __blkif_disconnect_complete(void *arg)
+{
+ blkif_t *blkif = (blkif_t *)arg;
+ ctrl_msg_t cmsg;
+ blkif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in blkif_disconnect() because at that point there
+ * may be outstanding requests at the disc whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(blkif->evtchn);
+ vfree(blkif->blk_ring.sring);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
+ cmsg.id = blkif->disconnect_rspid;
+ cmsg.length = sizeof(blkif_be_disconnect_t);
+ disc.domid = blkif->domid;
+ disc.blkif_handle = blkif->handle;
+ disc.status = BLKIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'blkif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( blkif->status != DISCONNECTING )
+ BUG();
+ blkif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void blkif_disconnect_complete(blkif_t *blkif)
+{
+ INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
+ schedule_work(&blkif->work);
+}
void blkif_ptfe_create(blkif_be_create_t *create)
{
- blkif_t *blkif;
+ blkif_t *blkif, **pblkif;
domid_t domid = create->domid;
unsigned int handle = create->blkif_handle;
/* May want to store info on the connecting domain here. */
DPRINTK("PT got BE_CREATE\n");
- blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
+
+ if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
+ {
+ DPRINTK("Could not create blkif: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
/* blkif struct init code from blkback.c */
memset(blkif, 0, sizeof(*blkif));
blkif->domid = domid;
blkif->handle = handle;
- blkif->status = DISCONNECTED;
+ blkif->status = DISCONNECTED;
spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 0);
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+ {
+ DPRINTK("Could not create blkif: already exists\n");
+ create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+ kmem_cache_free(blkif_cachep, blkif);
+ return;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
+
+ blkif->hash_next = *pblkif;
+ *pblkif = blkif;
+
create->status = BLKIF_BE_STATUS_OKAY;
}
{
/* Clear anything that we initialized above. */
+ domid_t domid = destroy->domid;
+ unsigned int handle = destroy->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
DPRINTK("PT got BE_DESTROY\n");
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif = *pblkif) != NULL )
+ {
+ if ( (blkif->domid == domid) && (blkif->handle == handle) )
+ {
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pblkif = &blkif->hash_next;
+ }
+
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pblkif = blkif->hash_next;
+ kmem_cache_free(blkif_cachep, blkif);
destroy->status = BLKIF_BE_STATUS_OKAY;
}
void blkif_ptfe_connect(blkif_be_connect_t *connect)
{
- domid_t domid = connect->domid;
- /*unsigned int handle = connect->blkif_handle;*/
- unsigned int evtchn = connect->evtchn;
- unsigned long shmem_frame = connect->shmem_frame;
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->blkif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
struct vm_struct *vma;
- pgprot_t prot;
- int error;
- blkif_t *blkif;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+ blkif_sring_t *sring;
DPRINTK("PT got BE_CONNECT\n");
- blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n",
+ connect->domid, connect->blkif_handle);
+ connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
{
return;
}
+ sring = (blkif_sring_t *)vma->addr;
+ SHARED_RING_INIT(BLKIF_RING, sring);
+ BACK_RING_INIT(BLKIF_RING, &blkif->blk_ring, sring);
+
blkif->evtchn = evtchn;
blkif->irq = bind_evtchn_to_irq(evtchn);
blkif->shmem_frame = shmem_frame;
- blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
blkif->status = CONNECTED;
- /*blkif_get(blkif);*/
+ blkif_get(blkif);
request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
connect->status = BLKIF_BE_STATUS_OKAY;
}
-void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect)
+int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
{
- /*
- * don't actually set the passthrough to disconnected.
- * We just act as a pipe, and defer to the real ends to handle things like
- * recovery.
- */
+ domid_t domid = disconnect->domid;
+ unsigned int handle = disconnect->blkif_handle;
+ blkif_t *blkif;
DPRINTK("PT got BE_DISCONNECT\n");
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_disconnect attempted for non-existent blkif"
+ " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle);
+ disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( blkif->status == CONNECTED )
+ {
+ blkif->status = DISCONNECTING;
+ blkif->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(blkif->irq, blkif);
+ blkif_deschedule(blkif);
+ blkif_put(blkif);
+ return 0; /* Caller should not send response message. */
+ }
disconnect->status = BLKIF_BE_STATUS_OKAY;
- return;
+ return 1;
}
/*-----[ Control Messages to/from Backend VM ]----------------------------*/
};
blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
msg->handle = 0;
- msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT;
+ msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
/* Move from CLOSED to DISCONNECTED state. */
static void blkif_ptbe_disconnect(void)
{
- blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
- blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod
- = ptbe_resp_cons = ptbe_req_prod = 0;
+ blkif_sring_t *sring;
+
+ sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
+ SHARED_RING_INIT(BLKIF_RING, sring);
+ FRONT_RING_INIT(BLKIF_RING, &blktap_be_ring, sring);
blkif_pt_state = BLKIF_STATE_DISCONNECTED;
DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
blkif_ptbe_send_interface_connect();
case CMSG_BLKIF_BE_DISCONNECT:
if ( msg->length != sizeof(blkif_be_disconnect_t) )
goto parse_error;
- blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]);
+ if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0],
+ msg->id) )
+ return;
break;
/* We just ignore anything to do with vbds for now. */
msg->length = 0;
ctrl_if_send_response(msg);
}
+
+/*-----[ All control messages enter here: ]-------------------------------*/
+
+void __init blkif_interface_init(void)
+{
+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
+ 0, 0, NULL, NULL);
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+}
* Block request routing data path.
*
* Copyright (c) 2004, Andrew Warfield
- *
+ * -- see full header in blktap.c
*/
#include "blktap.h"
+#include <asm-xen/evtchn.h>
/*-----[ The data paths ]-------------------------------------------------*/
-
-/* Connections to the frontend domains.*/
-blkif_t ptfe_blkif;
-
-/* Connection to a single backend domain. */
-blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */
-BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
-BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */
-/* Rings up to user space. */
-blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
-blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
-
-/*-----[ Ring helpers ]---------------------------------------------------*/
-
-inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring)
-{
- if (ring->type == BLKIF_REQ_RING_TYPE) {
- blkif_req_ring_t *r = (blkif_req_ring_t *)ring;
- return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE );
- }
-
- /* for now assume that there is always room in the response path. */
- return 0;
-}
+/* Connection to a single backend domain. */
+blkif_front_ring_t blktap_be_ring;
/*-----[ Tracking active requests ]---------------------------------------*/
/* this must be the same as MAX_PENDING_REQS in blkback.c */
-#define MAX_ACTIVE_REQS 64
+#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
-active_req_t active_reqs[MAX_ACTIVE_REQS];
-unsigned char active_req_ring[MAX_ACTIVE_REQS];
-spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
-typedef unsigned int ACTIVE_RING_IDX;
-ACTIVE_RING_IDX active_prod, active_cons;
+active_req_t active_reqs[MAX_ACTIVE_REQS];
+ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
+ACTIVE_RING_IDX active_prod, active_cons;
#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
inline active_req_t *get_active_req(void)
{
- ASSERT(active_cons != active_prod);
- return &active_reqs[MASK_ACTIVE_IDX(active_cons++)];
+ ACTIVE_RING_IDX idx;
+ active_req_t *ar;
+ unsigned long flags;
+
+ ASSERT(active_cons != active_prod);
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
+ ar = &active_reqs[idx];
+if (ar->inuse) WPRINTK("AR INUSE! (%lu)\n", ar->id);
+ar->inuse = 1;
+ spin_unlock_irqrestore(&active_req_lock, flags);
+
+ return ar;
}
inline void free_active_req(active_req_t *ar)
unsigned long flags;
spin_lock_irqsave(&active_req_lock, flags);
+ar->inuse = 0;
active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
spin_unlock_irqrestore(&active_req_lock, flags);
}
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
+{
+ return &active_reqs[idx];
+}
+
inline void active_reqs_init(void)
{
ACTIVE_RING_IDX i;
active_req_ring[i] = i;
}
+/* Requests passing through the tap to the backend hijack the id field
+ * in the request message. In it we put the AR index _AND_ the fe domid.
+ * the domid is used by the backend to map the pages properly.
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
+ return ( (fe_dom << 16) | idx );
+}
+
+inline unsigned int ID_TO_IDX(unsigned long id)
+{
+ return ( id & 0x0000ffff );
+}
+
+inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
+{
+ blkif_response_t *resp_d;
+ active_req_t *ar;
+
+ /* remap id, and free the active req. blkif lookup goes here too.*/
+ ar = &active_reqs[ID_TO_IDX(rsp->id)];
+ /* WPRINTK("%3u > %3lu\n", ID_TO_IDX(rsp->id), ar->id); */
+ rsp->id = ar->id;
+ free_active_req(ar);
+
+ resp_d = RING_GET_RESPONSE(BLKIF_RING, &blkif->blk_ring,
+ blkif->blk_ring.rsp_prod_pvt);
+ memcpy(resp_d, rsp, sizeof(blkif_response_t));
+ wmb();
+ blkif->blk_ring.rsp_prod_pvt++;
+
+ return 0;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+ blkif_request_t *req_d;
+
+ req_d = RING_GET_REQUEST(BLKIF_RING, &blktap_be_ring,
+ blktap_be_ring.req_prod_pvt);
+ memcpy(req_d, req, sizeof(blkif_request_t));
+ wmb();
+ blktap_be_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+inline void kick_fe_domain(blkif_t *blkif)
+{
+ RING_PUSH_RESPONSES(BLKIF_RING, &blkif->blk_ring);
+ notify_via_evtchn(blkif->evtchn);
+ DPRINTK("notified FE(dom %u)\n", blkif->domid);
+
+}
+
+inline void kick_be_domain(void)
+{
+ wmb(); /* Ensure that the frontend can see the requests. */
+ RING_PUSH_REQUESTS(BLKIF_RING, &blktap_be_ring);
+ notify_via_evtchn(blkif_ptbe_evtchn);
+ DPRINTK("notified BE\n");
+}
+
/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+/*-----[ Scheduler list maint -from blkback ]--- */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+ return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( !__on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( __on_blkdev_list(blkif) )
+ {
+ list_del(&blkif->blkdev_list);
+ blkif->blkdev_list.next = NULL;
+ blkif_put(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( __on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+ {
+ list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+ blkif_get(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/*-----[ Scheduler functions - from blkback ]--- */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+
+static int blkio_schedule(void *arg)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ blkif_t *blkif;
+ struct list_head *ent;
+
+ daemonize(
+ "xentapd"
+ );
+
+ for ( ; ; )
+ {
+ /* Wait for work to do. */
+ add_wait_queue(&blkio_schedule_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) ||
+ list_empty(&blkio_schedule_list) )
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&blkio_schedule_wait, &wq);
+
+ /* Queue up a batch of requests. */
+ while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
+ !list_empty(&blkio_schedule_list) )
+ {
+ ent = blkio_schedule_list.next;
+ blkif = list_entry(ent, blkif_t, blkdev_list);
+ blkif_get(blkif);
+ remove_from_blkdev_list(blkif);
+ if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+ add_to_blkdev_list_tail(blkif);
+ blkif_put(blkif);
+ }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ /* Push the batch through to disc. */
+ run_task_queue(&tq_disk);
+#endif
+ }
+}
+
+static void maybe_trigger_blkio_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
+ !list_empty(&blkio_schedule_list) )
+ wake_up(&blkio_schedule_wait);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+ remove_from_blkdev_list(blkif);
+}
+
+void __init blkdev_schedule_init(void)
+{
+ spin_lock_init(&blkio_schedule_list_lock);
+ INIT_LIST_HEAD(&blkio_schedule_list);
+
+ if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ BUG();
+}
+
+/*-----[ Interrupt entry from a frontend ]------ */
+
irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ blkif_t *blkif = dev_id;
+
+ add_to_blkdev_list_tail(blkif);
+ maybe_trigger_blkio_schedule();
+ return IRQ_HANDLED;
+}
+
+/*-----[ Other Frontend Ring functions ]-------- */
+
+/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
{
/* we have pending messages from the real frontend. */
- blkif_request_t *req_s, *req_d;
- BLKIF_RING_IDX fe_rp;
+ blkif_request_t *req_s;
+ RING_IDX i, rp;
unsigned long flags;
- int notify;
- unsigned long i;
active_req_t *ar;
+ int more_to_do = 0;
+ int notify_be = 0, notify_user = 0;
DPRINTK("PT got FE interrupt.\n");
+
+ if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
/* lock both rings */
spin_lock_irqsave(&blkif_io_lock, flags);
- /* While there are REQUESTS on FERing: */
- fe_rp = ptfe_blkif.blk_ring_base->req_prod;
+ rp = blkif->blk_ring.sring->req_prod;
rmb();
- notify = (ptfe_blkif.blk_req_cons != fe_rp);
-
- for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) {
-
- /* Get the next request */
- req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req;
+
+ for ( i = blkif->blk_ring.req_cons;
+ (i != rp) &&
+ !RING_REQUEST_CONS_OVERFLOW(BLKIF_RING, &blkif->blk_ring, i);
+ i++ )
+ {
+
+ if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS))
+ {
+ more_to_do = 1;
+ break;
+ }
+ req_s = RING_GET_REQUEST(BLKIF_RING, &blkif->blk_ring, i);
/* This is a new request:
* Assign an active request record, and remap the id.
*/
ar = get_active_req();
ar->id = req_s->id;
- req_s->id = ACTIVE_IDX(ar);
- DPRINTK("%3lu < %3lu\n", req_s->id, ar->id);
+ ar->blkif = blkif;
+ req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
+ /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
/* FE -> BE interposition point is here. */
/* ------------------------------------------------------------- */
/* BLKIF_OP_PROBE_HACK: */
- /* Until we have grant tables, we need to allow the backent to */
- /* map pages that are either from this domain, or more commonly */
- /* from the real front end. We achieve this in a terrible way, */
- /* by passing the front end's domid allong with PROBE messages */
- /* Once grant tables appear, this should all go away. */
+ /* Signal to the backend that we are a tap domain. */
if (req_s->operation == BLKIF_OP_PROBE) {
- DPRINTK("Adding FE domid to PROBE request.\n");
- (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid;
+ DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
+ req_s->frame_and_sects[1] = BLKTAP_COOKIE;
}
/* ------------------------------------------------------------- */
/* In MODE_INTERCEPT_FE, map attached pages into the app vma */
/* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
- /* XXX: mapping/copying of attached pages is still not done! */
-
DPRINTK("req->UFERing\n");
blktap_write_fe_ring(req_s);
-
-
+ notify_user = 1;
}
/* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
/* copy the request message to the BERing */
DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
- (unsigned)MASK_BLKIF_IDX(i),
- (unsigned)MASK_BLKIF_IDX(ptbe_req_prod));
-
- req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
+ (unsigned)__SHARED_RING_MASK(BLKIF_RING,
+ blktap_be_ring.sring, i),
+ (unsigned)__SHARED_RING_MASK(BLKIF_RING,
+ blktap_be_ring.sring, blktap_be_ring.req_prod_pvt));
- memcpy(req_d, req_s, sizeof(blkif_request_t));
-
- ptbe_req_prod++;
- }
- }
-
- ptfe_blkif.blk_req_cons = i;
-
- /* If we have forwarded any responses, notify the appropriate ends. */
- if (notify) {
-
- /* we have sent stuff to the be, notify it. */
- if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
- (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
- wmb();
- blk_ptbe_ring->req_prod = ptbe_req_prod;
-
- notify_via_evtchn(blkif_ptbe_evtchn);
- DPRINTK(" -- and notified.\n");
- }
-
- /* we sent stuff to the app, notify it. */
- if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
- (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
-
- blktap_kick_user();
+ write_req_to_be_ring(req_s);
+ notify_be = 1;
}
}
+ blkif->blk_ring.req_cons = i;
+
/* unlock rings */
spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- return IRQ_HANDLED;
-}
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
- blkif_request_t *req_d;
-
- req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
- memcpy(req_d, req, sizeof(blkif_request_t));
- ptbe_req_prod++;
-
- return 0;
-}
-
-inline void kick_be_domain(void) {
- wmb();
- blk_ptbe_ring->req_prod = ptbe_req_prod;
- notify_via_evtchn(blkif_ptbe_evtchn);
+
+ if (notify_user)
+ blktap_kick_user();
+ if (notify_be)
+ kick_be_domain();
+
+ return more_to_do;
}
/*-----[ Data to/from Backend (server) VM ]------------------------------*/
irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
struct pt_regs *ptregs)
{
- blkif_response_t *resp_s, *resp_d;
- BLKIF_RING_IDX be_rp;
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ RING_IDX rp, i;
unsigned long flags;
- int notify;
- unsigned long i;
- active_req_t *ar;
DPRINTK("PT got BE interrupt.\n");
/* lock both rings */
spin_lock_irqsave(&blkif_io_lock, flags);
- /* While there are RESPONSES on BERing: */
- be_rp = blk_ptbe_ring->resp_prod;
+ rp = blktap_be_ring.sring->rsp_prod;
rmb();
- notify = (ptbe_resp_cons != be_rp);
-
- for ( i = ptbe_resp_cons; i != be_rp; i++ )
+
+ for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
{
- /* BE -> FE interposition point is here. */
+ resp_s = RING_GET_RESPONSE(BLKIF_RING, &blktap_be_ring, i);
- /* Get the next response */
- resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp;
+ /* BE -> FE interposition point is here. */
-
+ blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
+
/* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
(blktap_mode & BLKTAP_MODE_COPY_BE) ) {
/* In MODE_INTERCEPT_BE, map attached pages into the app vma */
/* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
- /* XXX: copy/map the attached page! */
-
DPRINTK("rsp->UBERing\n");
blktap_write_be_ring(resp_s);
+ blktap_kick_user();
}
/* Copy the response message to FERing */
DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
- (unsigned) MASK_BLKIF_IDX(i),
- (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod));
-
- /* remap id, and free the active req. blkif lookup goes here too.*/
- ar = &active_reqs[resp_s->id];
- DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id);
- resp_s->id = ar->id;
- free_active_req(ar);
-
- resp_d = &ptfe_blkif.blk_ring_base->ring[
- MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
-
- memcpy(resp_d, resp_s, sizeof(blkif_response_t));
-
- ptfe_blkif.blk_resp_prod++;
+ (unsigned)__SHARED_RING_MASK(BLKIF_RING,
+ blkif->blk_ring.sring, i),
+ (unsigned)__SHARED_RING_MASK(BLKIF_RING,
+ blkif->blk_ring.sring,
+ blkif->blk_ring.rsp_prod_pvt));
- }
- }
-
- ptbe_resp_cons = i;
-
- /* If we have forwarded any responses, notify the apropriate domains. */
- if (notify) {
+ write_resp_to_fe_ring(blkif, resp_s);
+ kick_fe_domain(blkif);
- /* we have sent stuff to the fe. notify it. */
- if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
- (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
- wmb();
- ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
-
- notify_via_evtchn(ptfe_blkif.evtchn);
- DPRINTK(" -- and notified.\n");
- }
-
- /* we sent stuff to the app, notify it. */
- if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
- (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
-
- blktap_kick_user();
}
}
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
- return IRQ_HANDLED;
-}
-
-inline int write_resp_to_fe_ring(blkif_response_t *rsp)
-{
- blkif_response_t *resp_d;
- active_req_t *ar;
- /* remap id, and free the active req. blkif lookup goes here too.*/
- ar = &active_reqs[rsp->id];
- DPRINTK("%3lu > %3lu\n", rsp->id, ar->id);
- rsp->id = ar->id;
- free_active_req(ar);
-
- resp_d = &ptfe_blkif.blk_ring_base->ring[
- MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
-
- memcpy(resp_d, rsp, sizeof(blkif_response_t));
- ptfe_blkif.blk_resp_prod++;
-
- return 0;
-}
-
-inline void kick_fe_domain(void) {
- wmb();
- ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
- notify_via_evtchn(ptfe_blkif.evtchn);
+ blktap_be_ring.rsp_cons = i;
-}
-
-static inline void flush_requests(void)
-{
- wmb(); /* Ensure that the frontend can see the requests. */
- blk_ptbe_ring->req_prod = ptbe_req_prod;
- notify_via_evtchn(blkif_ptbe_evtchn);
-}
-
-/*-----[ Data to/from user space ]----------------------------------------*/
-
-
-int blktap_write_fe_ring(blkif_request_t *req)
-{
- blkif_request_t *target;
- int error, i;
-
- /*
- * This is called to pass a request from the real frontend domain's
- * blkif ring to the character device.
- */
-
- if ( ! blktap_ring_ok ) {
- DPRINTK("blktap: fe_ring not ready for a request!\n");
- return 0;
- }
-
- if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) {
- DPRINTK("blktap: fe_ring is full, can't add.\n");
- return 0;
- }
-
- target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req;
- memcpy(target, req, sizeof(*req));
-
-/* maybe move this stuff out into a seperate func ------------------- */
-
- /*
- * For now, map attached page into a fixed position into the vma.
- * XXX: make this map to a free page.
- */
-
- /* Attempt to map the foreign pages directly in to the application */
- for (i=0; i<target->nr_segments; i++) {
-
- /* get an unused virtual address from the char device */
- /* store the old page address */
- /* replace the address with the virtual address */
-
- /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */
-
- error = direct_remap_area_pages(blktap_vma->vm_mm,
- MMAP_VADDR(req->id, i),
- target->frame_and_sects[0] & PAGE_MASK,
- PAGE_SIZE,
- blktap_vma->vm_page_prot,
- ptfe_blkif.domid);
- if ( error != 0 ) {
- printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
- return 0;
- }
- }
- /* fix the address of the attached page in the message. */
- /* TODO: preserve the segment number stuff here... */
- /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/
-/* ------------------------------------------------------------------ */
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
- fe_ring.req_prod++;
-
- return 0;
-}
-
-int blktap_write_be_ring(blkif_response_t *rsp)
-{
- blkif_response_t *target;
-
- /*
- * This is called to pass a request from the real backend domain's
- * blkif ring to the character device.
- */
-
- if ( ! blktap_ring_ok ) {
- DPRINTK("blktap: be_ring not ready for a request!\n");
- return 0;
- }
-
- if ( BLKTAP_RING_FULL(RING(&be_ring)) ) {
- DPRINTK("blktap: be_ring is full, can't add.\n");
- return 0;
- }
-
- target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp;
- memcpy(target, rsp, sizeof(*rsp));
-
-
- /* XXX: map attached pages and fix-up addresses in the copied address. */
-
- be_ring.rsp_prod++;
-
- return 0;
+ return IRQ_HANDLED;
}
-int blktap_read_fe_ring(void)
-{
- /* This is called to read responses from the UFE ring. */
-
- BLKIF_RING_IDX fe_rp;
- unsigned long i;
- int notify;
-
- DPRINTK("blktap_read_fe_ring()\n");
-
- fe_rp = fe_ring.ring->resp_prod;
- rmb();
- notify = (fe_rp != fe_ring.rsp_cons);
-
- /* if we are forwarding from UFERring to FERing */
- if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
-
- /* for each outstanding message on the UFEring */
- for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) {
+/* Debug : print the current ring indices. */
- /* XXX: remap pages on that message as necessary */
- /* copy the message to the UBEring */
-
- DPRINTK("resp->fe_ring\n");
- write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp);
- }
-
- fe_ring.rsp_cons = fe_rp;
-
- /* notify the fe if necessary */
- if ( notify ) {
- DPRINTK("kick_fe_domain()\n");
- kick_fe_domain();
- }
- }
-
- return 0;
-}
-
-int blktap_read_be_ring(void)
+void print_vm_ring_idxs(void)
{
- /* This is called to read responses from the UBE ring. */
-
- BLKIF_RING_IDX be_rp;
- unsigned long i;
- int notify;
-
- DPRINTK("blktap_read_be_ring()\n");
-
- be_rp = be_ring.ring->req_prod;
- rmb();
- notify = (be_rp != be_ring.req_cons);
-
- /* if we are forwarding from UFERring to FERing */
- if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
-
- /* for each outstanding message on the UFEring */
- for ( i = be_ring.req_cons; i != be_rp; i++ ) {
-
- /* XXX: remap pages on that message as necessary */
- /* copy the message to the UBEring */
-
- DPRINTK("req->be_ring\n");
- write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req);
- }
-
- be_ring.req_cons = be_rp;
-
- /* notify the fe if necessary */
- if ( notify ) {
- DPRINTK("kick_be_domain()\n");
- kick_be_domain();
- }
+ int i;
+ blkif_t *blkif;
+
+ WPRINTK("FE Rings: \n---------\n");
+ for ( i = 0; i < 50; i++) {
+ blkif = blkif_find_by_handle((domid_t)i, 0);
+ if (blkif != NULL)
+ WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n", i,
+ blkif->blk_ring.req_cons,
+ blkif->blk_ring.rsp_prod_pvt,
+ blkif->blk_ring.sring->req_prod,
+ blkif->blk_ring.sring->rsp_prod);
}
-
- return 0;
-}
+ WPRINTK("BE Ring: \n--------\n");
+ WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_be_ring.rsp_cons,
+ blktap_be_ring.req_prod_pvt,
+ blktap_be_ring.sring->req_prod,
+ blktap_be_ring.sring->rsp_prod);
+}
unsigned long mmap_vstart;
unsigned long rings_vstart;
+/* Rings up to user space. */
+static blkif_front_ring_t blktap_ufe_ring;
+static blkif_back_ring_t blktap_ube_ring;
+
/* -------[ blktap vm ops ]------------------------------------------- */
static struct page *blktap_nopage(struct vm_area_struct *vma,
static int blktap_open(struct inode *inode, struct file *filp)
{
+ blkif_sring_t *sring;
+
if ( test_and_set_bit(0, &blktap_dev_inuse) )
return -EBUSY;
printk(KERN_ALERT "blktap open.\n");
/* Allocate the fe ring. */
- fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
- if (fe_ring.ring == NULL)
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
goto fail_nomem;
- SetPageReserved(virt_to_page(fe_ring.ring));
+ SetPageReserved(virt_to_page(sring));
- fe_ring.ring->req_prod = fe_ring.ring->resp_prod
- = fe_ring.req_prod
- = fe_ring.rsp_cons
- = 0;
+ SHARED_RING_INIT(BLKIF_RING, sring);
+ FRONT_RING_INIT(BLKIF_RING, &blktap_ufe_ring, sring);
/* Allocate the be ring. */
- be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
- if (be_ring.ring == NULL)
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
goto fail_free_fe;
- SetPageReserved(virt_to_page(be_ring.ring));
+ SetPageReserved(virt_to_page(sring));
- be_ring.ring->req_prod = be_ring.ring->resp_prod
- = be_ring.rsp_prod
- = be_ring.req_cons
- = 0;
+ SHARED_RING_INIT(BLKIF_RING, sring);
+ BACK_RING_INIT(BLKIF_RING, &blktap_ube_ring, sring);
DPRINTK(KERN_ALERT "blktap open.\n");
return 0;
fail_free_fe:
- free_page( (unsigned long) fe_ring.ring);
+ free_page( (unsigned long) blktap_ufe_ring.sring);
fail_nomem:
return -ENOMEM;
printk(KERN_ALERT "blktap closed.\n");
/* Free the ring page. */
- ClearPageReserved(virt_to_page(fe_ring.ring));
- free_page((unsigned long) fe_ring.ring);
+ ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+ free_page((unsigned long) blktap_ufe_ring.sring);
- ClearPageReserved(virt_to_page(be_ring.ring));
- free_page((unsigned long) be_ring.ring);
+ ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
+ free_page((unsigned long) blktap_ube_ring.sring);
return 0;
}
/* not sure if I really need to do this... */
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring));
- if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE,
- vma->vm_page_prot)) {
- printk(KERN_ERR "be_ring: remap_page_range failure!\n");
+ DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
+ if (remap_page_range(vma, vma->vm_start,
+ __pa(blktap_ube_ring.sring),
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("be_ring: remap_page_range failure!\n");
}
- DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring));
- if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring),
+ DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
+ if (remap_page_range(vma, vma->vm_start + PAGE_SIZE,
+ __pa(blktap_ufe_ring.sring),
PAGE_SIZE, vma->vm_page_prot)) {
- printk(KERN_ERR "fe_ring: remap_page_range failure!\n");
+ WPRINTK("fe_ring: remap_page_range failure!\n");
}
blktap_vma = vma;
printk(KERN_INFO "blktap: set mode to %lx\n", arg);
return 0;
}
- /* XXX: return a more meaningful error case here. */
+ case BLKTAP_IOCTL_PRINT_IDXS:
+ {
+ print_vm_ring_idxs();
+ WPRINTK("User Rings: \n-----------\n");
+ WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ufe_ring.rsp_cons,
+ blktap_ufe_ring.req_prod_pvt,
+ blktap_ufe_ring.sring->req_prod,
+ blktap_ufe_ring.sring->rsp_prod);
+ WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ube_ring.req_cons,
+ blktap_ube_ring.rsp_prod_pvt,
+ blktap_ube_ring.sring->req_prod,
+ blktap_ube_ring.sring->rsp_prod);
+
+ }
}
return -ENOIOCTLCMD;
}
{
poll_wait(file, &blktap_wait, wait);
- if ( (fe_ring.req_prod != fe_ring.ring->req_prod) ||
- (be_ring.rsp_prod != be_ring.ring->resp_prod) ) {
+ if ( RING_HAS_UNPUSHED_REQUESTS(BLKIF_RING, &blktap_ufe_ring) ||
+ RING_HAS_UNPUSHED_RESPONSES(BLKIF_RING, &blktap_ube_ring) ) {
- fe_ring.ring->req_prod = fe_ring.req_prod;
- be_ring.ring->resp_prod = be_ring.rsp_prod;
+ RING_PUSH_REQUESTS(BLKIF_RING, &blktap_ufe_ring);
+ RING_PUSH_RESPONSES(BLKIF_RING, &blktap_ube_ring);
return POLLIN | POLLRDNORM;
}
release: blktap_release,
mmap: blktap_mmap,
};
+
+/*-----[ Data to/from user space ]----------------------------------------*/
+
+
+int blktap_write_fe_ring(blkif_request_t *req)
+{
+ blkif_request_t *target;
+ int error, i;
+ /*
+ * This is called to pass a request from the real frontend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: ufe_ring not ready for a request!\n");
+ return 0;
+ }
+
+ if ( RING_FULL(BLKIF_RING, &blktap_ufe_ring) ) {
+ DPRINTK("blktap: fe_ring is full, can't add.\n");
+ return 0;
+ }
+
+ //target = RING_NEXT_EMPTY_REQUEST(BLKIF_RING, &blktap_ufe_ring);
+ target = RING_GET_REQUEST(BLKIF_RING, &blktap_ufe_ring,
+ blktap_ufe_ring.req_prod_pvt);
+ memcpy(target, req, sizeof(*req));
+
+ /* Attempt to map the foreign pages directly in to the application */
+ for (i=0; i<target->nr_segments; i++) {
+
+ error = direct_remap_area_pages(blktap_vma->vm_mm,
+ MMAP_VADDR(ID_TO_IDX(req->id), i),
+ target->frame_and_sects[0] & PAGE_MASK,
+ PAGE_SIZE,
+ blktap_vma->vm_page_prot,
+ ID_TO_DOM(req->id));
+ if ( error != 0 ) {
+ printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+ /* the request is now dropped on the floor. */
+ return 0;
+ }
+ }
+
+ blktap_ufe_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+int blktap_write_be_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *target;
+
+ /*
+ * This is called to pass a request from the real backend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ /* No test for fullness in the response direction. */
+
+ //target = RING_NEXT_EMPTY_RESPONSE(BLKIF_RING, &blktap_ube_ring);
+ target = RING_GET_RESPONSE(BLKIF_RING, &blktap_ube_ring,
+ blktap_ube_ring.rsp_prod_pvt);
+ memcpy(target, rsp, sizeof(*rsp));
+
+ /* no mapping -- pages were mapped in blktap_write_fe_ring() */
+
+ blktap_ube_ring.rsp_prod_pvt++;
+
+ return 0;
+}
+
+int blktap_read_fe_ring(void)
+{
+ /* This is called to read responses from the UFE ring. */
+
+ RING_IDX i, rp;
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ active_req_t *ar;
+
+ DPRINTK("blktap_read_fe_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+ /* for each outstanding message on the UFEring */
+ //RING_FOREACH_RESPONSE(BLKIF_RING, &blktap_ufe_ring, prod, resp_s) {
+ rp = blktap_ufe_ring.sring->rsp_prod;
+ rmb();
+
+ for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
+ {
+ resp_s = RING_GET_RESPONSE(BLKIF_RING, &blktap_ufe_ring, i);
+
+ DPRINTK("resp->fe_ring\n");
+ ar = lookup_active_req(ID_TO_IDX(resp_s->id));
+ blkif = ar->blkif;
+ write_resp_to_fe_ring(blkif, resp_s);
+ kick_fe_domain(blkif);
+ }
+
+ blktap_ufe_ring.rsp_cons = i;
+ }
+ return 0;
+}
+
+int blktap_read_be_ring(void)
+{
+ /* This is called to read requests from the UBE ring. */
+
+ RING_IDX i, rp;
+ blkif_request_t *req_s;
+
+ DPRINTK("blktap_read_be_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+
+ /* for each outstanding message on the UFEring */
+ //RING_FOREACH_REQUEST(BLKIF_RING, &blktap_ube_ring, prod, req_s) {
+ rp = blktap_ube_ring.sring->req_prod;
+ rmb();
+ for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
+ {
+ req_s = RING_GET_REQUEST(BLKIF_RING, &blktap_ube_ring, i);
+
+ DPRINTK("req->be_ring\n");
+ write_req_to_be_ring(req_s);
+ kick_be_domain();
+ }
+
+ blktap_ube_ring.req_cons = i;
+ }
+
+ return 0;
+}
/* -------[ blktap module setup ]------------------------------------- */
static struct miscdevice blktap_miscdev = {